**************************************************************************************************************
**	Replication do-file for "Governmment Policies and the Emergence of an Ethnic Dimension in Party Systems"
**	Author: Maayan Mor
**	Purpose: Create "state long.dta."  
*************************************************************************************************************
clear all
set more off

/* SET DIRECTORY */

/*********************************************
Manage the biographies 67-73 file. This 
file was created with the "state wide" dataset.
This biographies data file will be added at the 
bottom of the 48-67 data file.
**********************************************/

use "biographies 67-73.dta", clear
keep if seat==1 
keep if round==1

foreach i of numlist 67 70 73{
gen center`i'=1 if party=="Z" & year==`i'
recode center`i' (.=0)
}

collapse (sum) center67-center73, by(cst_k)

** add information about district magnitude. 
rename cst_k Kuehne_ID

merge 1:1 Kuehne_ID using "state wide.dta", keepusing(DM_t)
drop if _merge==1
drop _merge /* all merge but the three Berlin districts. 
Usually, I would have to collapase only Berlin and then calculate an average. 
However, no Zentrum candidate had ever been elected in Berlin.*/

reshape long center, i(Kuehne_ID) j(year)
rename center p_

save "biographies 67-73_managed.dta", replace

/*************************************
Manage the biographies 49-67 file
*************************************/
import excel "Biographies 49-67.xlsx", sheet("biographies") firstrow clear
drop if hf_id==. /*empty rows*/

rename relgion catholic

replace Kuehne_ID="0" if Kuehne_ID=="?" 
destring Kuehne_ID, replace

**  Calculate the total number of catholic legislators by CST-LP
keep if t1!=. // keep only those who were elected to the first session
sort  hf_rb hf_cst LP 
gen space="_"
egen cst_lp=concat(Kuehne_ID space LP)
replace catholic="0" if catholic=="?"
destring catholic, replace
egen total_catholics=total(catholic), by(cst_lp)

** now calculate the total number of Center delegates by cst-legislative period
replace party="Z" if party=="KT later Z" | ///
					 party=="KT" | ///
					 party=="BKF/Z" | ///
					 party=="P later KT" | ///
					 party=="U/Z" | ///
					 party=="Z/U/Z" | ///
					 party=="KT (Rechte)" | ///
					 party=="Z/LIB" | ///
					 party=="Z/U"

gen center=1 if party=="Z"
egen total_center=total(center), by(cst_lp)

duplicates drop cst_lp, force 
keep Kuehne_ID LP total_catholics total_center

rename total_catholics r_
rename total_center p_
recode LP (1=48)(2=49) (3=52) (4=55) (5=58) (6=61) (7=62) (8=63) (9=66)

**		The raw dataset includes only MPs that were Catholic or
**		were members of the Zentrum. Reshape the dataset to create empty cells
**		for constituencies and years that do not appear in the raw data.	
reshape wide r_ p_ ,  i(Kuehne_ID) j(LP)
reshape long r_ p_ , i(Kuehne_ID) j(year)

** add the rest of the constituencies that had no Catholic/Center delegate
merge m:1 Kuehne_ID using "state wide.dta", keepusing(DM_t)

expand 9 if _merge==2 // bring back the constituencies w/o Catholics/Center

egen id_empty=group(Kuehne_ID) if _merge==2
sort id_empty
by id_empty: gen id=_n

replace id=. if _merge==3 | _merge==1

replace year=48 if id==1
replace year=49 if id==2
replace year=52 if id==3
replace year=55 if id==4
replace year=58 if id==5
replace year=61 if id==6
replace year=62 if id==7
replace year=63 if id==8
replace year=66 if id==9

drop _merge
recode p_ (.=0) 

save  "biographies 49-67.dta", replace

append using  "biographies 67-73_managed.dta"

merge m:m Kuehne_ID using "state wide.dta", ///
	keepusing(cathp_64 DM DM_t province priest_vicar monastery) /*all merged */ 
drop _merge
	
recode year (48=1848)(49=1849) (52=1852) (55=1855) (58=1858) (61=1861) ///
(62=1862) (63=1863) (66=1866) (67=1867) (70=1870) (73=1873)

save "state long.dta", replace
